from __future__ import unicode_literals
import requests
from bs4 import BeautifulSoup
import time
import json
base_url='https://www.647aa.com'
file_name='870aa_katong.json'
piccontents=[]
count=0
num=0
for index in range(1,293):
    url='https://www.647aa.com/htm/piclist8/'+str(index)+'.htm'
    try:
        r=requests.get(url,timeout=5)
        time.sleep(1)
        count=count+1
        content=r.content
        soup=BeautifulSoup(content,'lxml')
        a_lists=soup.find('ul',attrs={'class':'textList'}).find_all('li')
        if len(a_lists)>0:
            for i in range(3,len(a_lists)):
                try:
                    n_url=base_url+a_lists[i].find('a')['href']
                    rr=requests.get(n_url,timeout=5)
                    ccon=rr.content
                    ct=BeautifulSoup(ccon,'lxml')
                    ccc=ct.find('div',attrs={'class':'picContent'})
                    temp=[]
                    temp.append(ccc.text)
                    temp.append(str(ccc))
                    piccontents.append(temp)
                    num=num+1
                    
                except Exception as e:
                    print(n_url)
                    print(e)
                finally:
                    time.sleep(1)
                    print(num,end='\r')

        if(count>100):
            print('strat write to disk...'+str(num))
            with open(file_name,mode='w+',encoding='utf-8') as file:
                file.write(json.dumps(piccontents,ensure_ascii=False))
            count=0


    except Exception as e:
        print(e)
        print(url)
    finally:

        pass
print('strat write to disk...'+str(num))
with open(file_name,mode='w+',encoding='utf-8') as file:
    file.write(json.dumps(piccontents,ensure_ascii=False))